library(knitr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
library(here)
## here() starts at C:/Users/la_pa/OneDrive/Documentos/R/Portfolio_DS241
library(sf)
## Warning: package 'sf' was built under R version 4.3.2
## Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(tmap)
## Warning: package 'tmap' was built under R version 4.3.2
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.3.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.2
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
Import Dataset
neigh = st_read(here("data_raw","DC_Health_Planning_Neighborhoods.geojson")) %>% clean_names()
## Reading layer `DC_Health_Planning_Neighborhoods' from data source
## `C:\Users\la_pa\OneDrive\Documentos\R\Portfolio_DS241\data_raw\DC_Health_Planning_Neighborhoods.geojson'
## using driver `GeoJSON'
## Simple feature collection with 51 features and 8 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -77.11976 ymin: 38.79165 xmax: -76.9094 ymax: 38.99556
## Geodetic CRS: WGS 84
plot(neigh)

Investigating Joining Spatial and Non-Spatial Data
dc_c = read_csv(here("data_raw","DC_COVID-19_Total_Positive_Cases_by_Neighborhood.csv")) %>% clean_names()
## Rows: 26372 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): DATE_REPORTED, NEIGHBORHOOD
## dbl (2): OBJECTID, TOTAL_POSITIVES
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_cases=dc_c %>%
filter(as_date(date_reported) == "2021-11-17") %>%
separate(neighborhood,into=c("code","name"),sep = ":") %>%
mutate(code=case_when(code=="N35" ~"N0",
TRUE ~ code)) %>%
select(-date_reported)
neigh2 = left_join(neigh, df_cases, by=c("code"))
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(neigh2) +tm_polygons("total_positives", alpha = .5)
df_census = get_acs(geography = "tract",
variables = c("median_inc"="B06011_001",
"pop" = "B01003_001E",
"pop_black" = "B02009_001"),
state = "DC", geometry = TRUE, year = 2021)
## Getting data from the 2017-2021 5-year ACS
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|========================= | 36%
|
|======================================= | 56%
|
|================================================================== | 94%
|
|======================================================================| 100%
class(df_census)
## [1] "sf" "data.frame"
plot(df_census)

dc_cases_filtered <- dc_c %>%
filter(as.Date(date_reported) == as.Date("2021-11-17")) %>%
separate(neighborhood, into = c("code", "name"), sep = ":") %>%
mutate(code = case_when(code == "N35" ~ "N0", TRUE ~ code)) %>%
select(-date_reported)
df_combined <- left_join(dc_cases_filtered, df_census, by = c("code" = "GEOID"))
neigh_with_crime <- left_join(neigh, df_combined, by = "code")
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(neigh_with_crime) +
tm_bubbles(size = "total_positives", col = "total_positives", alpha = 0.7) +
tm_borders() +
tm_layout(title = "Total Positives and Demographic Data by Neighborhood")
## Legend for symbol sizes not available in view mode.
tm_shape(neigh_with_crime) +
tm_bubbles(size = "total_positives", col = "total_positives", alpha = 0.7) +
tm_borders() +
tm_shape(df_census) +
tm_fill("estimate", style = "cont") + # Use style "cont" for continuous variables
tm_borders() +
tm_layout(title = "Estimates: District of Columbia 2021")
## Legend for symbol sizes not available in view mode.
dc_c$date_reported <- as.Date(dc_c$date_reported)
neighborhood_totals <- dc_c %>%
group_by(neighborhood) %>%
summarize(total_positives = sum(total_positives, na.rm = TRUE)) %>%
filter(!is.na(neighborhood) & !grepl("^unknown$", neighborhood, ignore.case = TRUE))
sorted_neighborhoods <- neighborhood_totals %>% arrange(total_positives)
cat("Neighborhoods with the Least Amount of Total Positives:\n")
## Neighborhoods with the Least Amount of Total Positives:
least_5 <- head(sorted_neighborhoods, 5)
for (i in 1:5) {
cat("\"", least_5$neighborhood[i], "\": ", least_5$total_positives[i], "\n", sep = "")
}
## "N24: GWU/ FOGGY BOTTOM": 189
## "N24: GWU": 13806
## "N35: NATIONAL MALL": 23835
## "N36: NAVAL STATION & AIR FORCE": 35027
## "N15: DC MEDICAL CENTER": 62004
cat("\n")
cat("Neighborhoods with the Most Amount of Total Positives:\n")
## Neighborhoods with the Most Amount of Total Positives:
most_5 <- tail(sorted_neighborhoods, 5)
for (i in 1:5) {
cat("\"", most_5$neighborhood[i], "\": ", most_5$total_positives[i], "\n", sep = "")
}
## "N43: SW/WATERFRONT": 619367
## "N12: CHINATOWN": 619941
## "N7: BRIGHTWOOD": 632259
## "N1: 16th ST HEIGHTS": 693368
## "N13: COLUMBIA HEIGHTS": 891102